#ifndef KERNEL_SCHED_SCHED_H
#define KERNEL_SCHED_SCHED_H

#include <seminix/cpu.h>
#include <seminix/smp.h>
#include <seminix/tick.h>
#include <seminix/dump_stack.h>
#include <seminix/sched.h>
#include <seminix/init.h>
#include <seminix/irqflags.h>
#include <seminix/tcb.h>
#include <seminix/sched/rt.h>

struct rq;

void update_load_add(struct load_weight *lw, unsigned long inc);
void update_load_sub(struct load_weight *lw, unsigned long dec);

static inline int fair_policy(int policy)
{
    return policy == SEMINIX_SCHED_NORMAL || policy == SEMINIX_SCHED_BATCH;
}

static inline int dl_policy(int policy)
{
    return policy == SEMINIX_SCHED_DEADLINE;
}

static inline int task_has_rt_policy(struct tcb *p)
{
    return rt_policy(p->policy);
}

static inline int task_has_dl_policy(struct tcb *p)
{
    return dl_policy(p->policy);
}

static inline int dl_prio(int prio)
{
    if (unlikely(prio < MAX_DL_PRIO))
        return 1;
    return 0;
}

static inline int dl_task(struct tcb *p)
{
    return dl_policy(p->policy);
}


/*
 * Increase resolution of nice-level calculations for 64-bit architectures.
 * The extra resolution improves shares distribution and load balancing of
 * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
 * hierarchies, especially on larger systems. This is not a user-visible change
 * and does not change the user-interface for setting shares/weights.
 *
 * We increase resolution only if we have enough bits to allow this increased
 * resolution (i.e. BITS_PER_LONG > 32). The costs for increasing resolution
 * when BITS_PER_LONG <= 32 are pretty high and the returns do not justify the
 * increased costs.
 */
#if 0 /* BITS_PER_LONG > 32 -- currently broken: it increases power usage under light load  */
# define SCHED_LOAD_RESOLUTION	10
# define scale_load(w)		((w) << SCHED_LOAD_RESOLUTION)
# define scale_load_down(w)	((w) >> SCHED_LOAD_RESOLUTION)
#else
# define SCHED_LOAD_RESOLUTION	0
# define scale_load(w)		(w)
# define scale_load_down(w)	(w)
#endif

#define SCHED_LOAD_SHIFT	(10 + SCHED_LOAD_RESOLUTION)
#define SCHED_LOAD_SCALE	(1L << SCHED_LOAD_SHIFT)

#define NICE_0_LOAD		    SCHED_LOAD_SCALE
#define NICE_0_SHIFT		SCHED_LOAD_SHIFT

/*
 * To aid in avoiding the subversion of "niceness" due to uneven distribution
 * of tasks with abnormal "nice" values across CPUs the contribution that
 * each task makes to its run queue's load is weighted according to its
 * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
 * scaled version of the new time slice allocation that they receive on time
 * slice expiry etc.
 */
#define WEIGHT_IDLEPRIO                3
#define WMULT_IDLEPRIO         1431655765


/*
 * Single value that decides SCHED_DEADLINE internal math precision.
 * 10 -> just above 1us
 * 9  -> just above 0.5us
 */
#define DL_SCALE (10)

/*
 * single value that denotes runtime == period, ie unlimited time.
 */
#define RUNTIME_INF	((u64)~0ULL)


/*
 * used for enqueue/dequeue task flags
 */
#define ENQUEUE_WAKEUP      1
#define ENQUEUE_HEAD        2
#ifdef CONFIG_SMP
#define ENQUEUE_WAKING		4	/* sched_class::task_waking was called */
#else
#define ENQUEUE_WAKING		0
#endif
#define ENQUEUE_REPLENISH   8

#define DEQUEUE_SLEEP      2

/*
 * wake flags
 */
#define WF_SYNC		0x01		/* waker goes to sleep after wakeup */
#define WF_FORK		0x02		/* child wakeup after fork */

struct sched_class {
    const struct sched_class *next;

    void (*enqueue_task)(struct rq *rq, struct tcb *p, int flags);
    void (*dequeue_task)(struct rq *rq, struct tcb *p, int flags);
    void (*yield_task)(struct rq *rq);

    void (*check_preempt_curr)(struct rq *rq, struct tcb *p, int flags);

    struct tcb *(*pick_next_task)(struct rq *rq);
    void (*put_prev_task)(struct rq *rq, struct tcb *p);

#ifdef CONFIG_SMP
    void (*task_waking)(struct tcb *task);
#endif

    void (*set_curr_task)(struct rq *rq);
    void (*task_tick)(struct rq *rq, struct tcb *p, int queued);
    void (*task_new) (struct tcb *p);
    void (*task_dead) (struct tcb *p);

    void (*switched_from) (struct rq *this_rq, struct tcb *task);
    void (*switched_to) (struct rq *this_rq, struct tcb *task);
    void (*prio_changed) (struct rq *this_rq, struct tcb *task, int oldprio);

    unsigned int (*get_rr_interval) (struct rq *rq, struct tcb *task);
};

extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
extern const struct sched_class rt_sched_class;
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;

#define sched_class_highest (&stop_sched_class)
#define for_each_class(class) \
   for (class = sched_class_highest; class; class = class->next)

struct rt_bandwidth {
    /* nests inside the rq lock: */
    raw_spinlock_t rt_runtime_lock;
    ktime_t rt_period;
    u64     rt_runtime;
    struct hrtimer		rt_period_timer;
};

struct dl_bandwidth {
    raw_spinlock_t dl_runtime_lock;
    u64 dl_runtime;
    u64 dl_period;
};

struct dl_bw {
    raw_spinlock_t lock;
    u64 bw, total_bw;
};

/* Deadline class' related fields in a runqueue */
struct dl_rq {
    /* runqueue is an rbtree, ordered by deadline */
    struct rb_root rb_root;
    struct rb_node *rb_leftmost;

    unsigned long dl_nr_running;

    struct dl_bw dl_bw;
};

/*
 * This is the priority-queue data structure of the RT scheduling class:
 */
struct rt_prio_array {
    DECLARE_BITMAP(bitmap, MAX_RT_PRIO + 1); /* include 1 bit for delimiter */
    struct list_head queue[MAX_RT_PRIO];
};

/* Real-Time classes' related field in a runqueue: */
struct rt_rq {
    struct rt_prio_array active;
    unsigned int rt_nr_running;

    int rt_throttled;
    u64 rt_time;
    u64 rt_runtime;
    /* Nests inside the rq lock: */
    raw_spinlock_t rt_runtime_lock;
};

/* CFS-related fields in a runqueue */
struct cfs_rq {
    struct load_weight load;
    unsigned int nr_running, h_nr_running;

    u64 min_vruntime;
#ifndef CONFIG_64BIT
    u64 min_vruntime_copy;
#endif

    struct rb_root tasks_timeline;
    struct rb_node *rb_leftmost;

    /*
     * 'curr' points to currently running entity on this cfs_rq.
     * It is set to NULL otherwise (i.e when none are currently running).
     */
    struct sched_entity *curr, *next, *last, *skip;
};

/*
 * This is the main, per-CPU runqueue data structure.
 *
 * Locking rule: those places that want to lock multiple runqueues
 * (such as the load balancing or the thread migration code), lock
 * acquire operations must be ordered by ascending &runqueue.
 */
struct rq {
    /* runqueue lock: */
    spinlock_t lock;

    /*
     * nr_running and cpu_load should be in the same cacheline because
     * remote CPUs use both these fields when doing load calculation.
     */
    unsigned int nr_running;

#define CPU_LOAD_IDX_MAX 5
    unsigned long cpu_load[CPU_LOAD_IDX_MAX];
    unsigned long last_load_update_tick;

    unsigned long last_sched_tick;
    int skip_clock_update;

    /* capture load from *all* tasks on this cpu: */
    struct load_weight load;
    unsigned long nr_load_updates;
    u64 nr_switches;

    struct cfs_rq cfs;
    struct rt_rq rt;
    struct dl_rq dl;

    /*
     * This is part of a global counter where only the total sum
     * over all CPUs matters. A task can increase this counter on
     * one CPU and if it got migrated afterwards it may decrease
     * it on another CPU. Always updated under the runqueue lock:
     */
    unsigned long nr_uninterruptible;

    struct tcb *curr, *idle, *stop;

    u64 clock;
    u64 clock_task;

#ifdef CONFIG_SMP
    /* cpu of this runqueue: */
    int cpu;

    int hrtick_csd_pending;
    call_single_data_t hrtick_csd;
#endif
    struct hrtimer hrtick_timer;
};

DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);

#define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
#define this_rq()		this_cpu_ptr(&runqueues)
#define task_rq(p)		cpu_rq(task_cpu(p))
#define cpu_curr(cpu)		(cpu_rq(cpu)->curr)

static inline int cpu_of(struct rq *rq)
{
#ifdef CONFIG_SMP
    return rq->cpu;
#else
    return 0;
#endif
}

static inline u64 rq_clock(struct rq *rq)
{
    return rq->clock;
}

static inline u64 rq_clock_task(struct rq *rq)
{
    return rq->clock_task;
}

#define SCHED_FEAT(name, enabled)	\
    __SCHED_FEAT_##name ,

enum {
#include "features.h"
    __SCHED_FEAT_NR,
};
#undef SCHED_FEAT
#define sched_feat(x) (sysctl_sched_features & (1UL << __SCHED_FEAT_##x))

static inline u64 global_rt_period(void)
{
    return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
}

static inline u64 global_rt_runtime(void)
{
    if (sysctl_sched_rt_runtime < 0)
        return RUNTIME_INF;

    return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
}

static inline int rt_bandwidth_enabled(void)
{
    return sysctl_sched_rt_runtime >= 0;
}

static inline int task_current(struct rq *rq, struct tcb *p)
{
    return rq->curr == p;
}

static inline int task_running(struct rq *rq, struct tcb *p)
{
#ifdef CONFIG_SMP
    return p->on_cpu;
#else
    return task_current(rq, p);
#endif
}

#ifndef prepare_arch_switch
# define prepare_arch_switch(next)	do { } while (0)
#endif
#ifndef finish_arch_switch
# define finish_arch_switch(prev)	do { } while (0)
#endif
#ifndef finish_arch_post_lock_switch
# define finish_arch_post_lock_switch()	do { } while (0)
#endif

static inline void prepare_lock_switch(struct rq *rq, struct tcb *next)
{
#ifdef CONFIG_SMP
    /*
     * We can optimise this out completely for !SMP, because the
     * SMP rebalancing from interrupt is the only thing that cares
     * here.
     */
    next->on_cpu = 1;
#endif
}

static inline void finish_lock_switch(struct rq *rq, struct tcb *prev)
{
#ifdef CONFIG_SMP
    /*
     * After ->on_cpu is cleared, the task can be moved to a different CPU.
     * We must ensure this doesn't happen until the switch is completely
     * finished.
     */
    smp_wmb();
    prev->on_cpu = 0;
#endif
    spin_unlock_irq(&rq->lock);
}

static inline void rq_last_tick_reset(struct rq *rq)
{
    rq->last_sched_tick = jiffies_64;
}

#ifdef CONFIG_SMP
/*
 * double_rq_lock - safely lock two runqueues
 *
 * Note this does not disable interrupts like task_rq_lock,
 * you need to do so manually before calling.
 */
static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
{
    BUG_ON(!irqs_disabled());
    if (rq1 == rq2) {
        spin_lock(&rq1->lock);
    } else {
        if (rq1 < rq2) {
            spin_lock(&rq1->lock);
            spin_lock_nested(&rq2->lock, 0);
        } else {
            spin_lock(&rq2->lock);
            spin_lock_nested(&rq1->lock, 0);
        }
    }
}

/*
 * double_rq_unlock - safely unlock two runqueues
 *
 * Note this does not restore interrupts like task_rq_unlock,
 * you need to do so manually after calling.
 */
static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
{
    spin_unlock(&rq1->lock);
    if (rq1 != rq2)
        spin_unlock(&rq2->lock);
}

#else

/*
 * double_rq_lock - safely lock two runqueues
 *
 * Note this does not disable interrupts like task_rq_lock,
 * you need to do so manually before calling.
 */
static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
{
    BUG_ON(!irqs_disabled());
    BUG_ON(rq1 != rq2);
    spin_lock(&rq1->lock);
}

/*
 * double_rq_unlock - safely unlock two runqueues
 *
 * Note this does not restore interrupts like task_rq_unlock,
 * you need to do so manually after calling.
 */
static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
{
    BUG_ON(rq1 != rq2);
    spin_unlock(&rq1->lock);
}

#endif

extern void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period);

extern void update_rq_clock(struct rq *rq);

extern void hrtick_start(struct rq *rq, u64 delay);

extern void resched_task(struct tcb *p);
extern void resched_cpu(int cpu);

extern void check_preempt_curr(struct rq *rq, struct tcb *p, int flags);

extern unsigned long to_ratio(u64 period, u64 runtime);


extern struct dl_bw *dl_bw_of(int i);

static inline int dl_time_before(u64 a, u64 b)
{
    return (s64)(a - b) < 0;
}

/*
 * Tells if entity @a should preempt entity @b.
 */
static inline bool
dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
{
    return dl_time_before(a->deadline, b->deadline);
}

extern struct dl_bandwidth def_dl_bandwidth;

extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
extern void init_dl_bw(struct dl_bw *dl_b);
extern void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq);
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);


extern struct rt_bandwidth def_rt_bandwidth;

extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);

extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);


extern void init_cfs_rq(struct cfs_rq *cfs_rq);

/*
 * Use hrtick when:
 *  - enabled by features
 *  - hrtimer is actually high res
 */
static inline int hrtick_enabled(struct rq *rq)
{
    if (!sched_feat(HRTICK))
        return 0;
    return 1;
}

#endif /* !KERNEL_SCHED_SCHED_H */
